
********************************************************************************
********************************************************************************
*************************** THIS IS DO FILE 4 **********************************
******* THIS FILE GENERATES THE CONDITIONAL STOP PROBABILITIES *****************
********************************************************************************
********************************************************************************

*Version: 2023 March 29


cap mkdir `"$output/log/4_stop_probabilities"'
cap mkdir `"$output/temp"'

cap log close 
log using `"$output/log/4_stop_probabilities/4_stop_probabilities.smcl"', replace 

di "This run uses code version from 2023/03/29"

local max_proc_n 5

forvalues x = 0/4 {
	
	use `"$output/data/$line_file"', clear

	bys visit_billing_id: gegen min_Y_0 = min(Y_0)		// for period 0: identify visits with at least one denial
	gen R_0 = 0
	replace R_0 = 1 if min_Y_0 == 0 					// to use period zero, we only keep visits that have at least one denial 

	local y = `x' + 1

	*select sample   
	keep if R_`x' == 1		// we only keep observations that identify set S, so by summing over items in a visit, this is equaivalent ot summing over S

	*************************************************************
	*** Calculate stop probabilities for different subsets ***
	*************************************************************

	bys visit_billing_id: gen proc_n = _N 			// # of line items for visit 
	keep if proc_n <= `max_proc_n'					// dimensionality limit for visit

	*generate indicator for claim in next submission
	bys visit_billing_id: gegen next_claim = max(R_`y')			// next claim == 1 if there is a submission in the NEXT period	
	replace next_claim = 0 if next_claim == . 					// zero otherwise

	*Gen amount and size of set S
	bys visit_billing_id: gegen amt_S = sum(line_item_value)				// sum over the set of items, S
	bys visit_billing_id: gen n_S = _N							// dimensionality of set S

	*Gen amount and size of set Q
	bys visit_billing_id: gegen tmp_amt_Q = sum(line_item_value) if Y_`x' == 0		// sum over set Q, i.e. the items that are again denied 
	bys visit_billing_id: gen tmp_n_Q = 0
	replace tmp_n_Q = 1 if Y_`x' == 0									// dimensionality of Q
	bys visit_billing_id: gegen amt_Q = max(tmp_amt_Q)
	bys visit_billing_id: gegen n_Q = sum(tmp_n_Q)

	bys visit_billing_id: keep if _n == 1 				// keep only one observations per visits

	egen bin_S = cut(amt_S) , at(0(50)200)				// generate bin of S   											
	egen bin_Q = cut(amt_Q) , at(0(50)200)				// generate bin of Q
	replace bin_S = 200 if bin_S == . 
	replace bin_Q = 200 if bin_Q == .


	bys state pay_type_pooled n_Q n_S bin_S bin_Q visit_code_cat size_indicator: gegen p_stop = mean(next_claim) // probability that we observe a claim in the next period
	replace p_stop = 1 - p_stop																	  // probability that we DO NOT observe a claim in the next period, i.e. stopping probability
	bys state pay_type_pooled n_Q n_S bin_S bin_Q visit_code_cat size_indicator: gen observations = _N		 // observations used to compute stopping probability of a given cell


	bys state pay_type_pooled n_Q n_S bin_S bin_Q visit_code_cat size_indicator: keep if _n == 1 		 // keep oe observation per cell


	keep state pay_type_pooled n_Q n_S bin_Q bin_S visit_code_cat p_stop observations size_indicator

	save `"$output/temp/stop_probabilities_`x'.dta"', replace 

}


// use all periods and generate weighted averages

use `"$output/temp/stop_probabilities_0.dta"', clear
append using `"$output/temp/stop_probabilities_1.dta"'
append using `"$output/temp/stop_probabilities_2.dta"'
append using `"$output/temp/stop_probabilities_3.dta"'
append using `"$output/temp/stop_probabilities_4.dta"'



bys state pay_type_pooled n_Q n_S bin_Q bin_S visit_code_cat size_indicator: gegen mean_p_stop = mean(p_stop) [aweight=observations]

bys state pay_type_pooled n_Q n_S bin_Q bin_S visit_code_cat size_indicator: gegen total_observations = sum(observations)

keep state pay_type_pooled n_Q n_S bin_Q bin_S visit_code_cat mean_p_stop total_observations size_indicator
rename mean_p_stop p_stop

bys state pay_type_pooled n_Q n_S bin_S bin_Q visit_code_cat size_indicator: keep if _n == 1  

save `"$output/temp/stop_probabilities_weighted.dta"', replace 


erase `"$output/temp/stop_probabilities_0.dta"'
erase `"$output/temp/stop_probabilities_1.dta"'
erase `"$output/temp/stop_probabilities_2.dta"'
erase `"$output/temp/stop_probabilities_3.dta"'
erase `"$output/temp/stop_probabilities_4.dta"'


log close 

